Learning Objectives

  • Set up online repositories for data such as Google Drive and retrieve it directly into RStudio for utilization.

  • Describe how the Grammar of Graphics deviates from built-in plotting commands.

  • Use an aes-thetic to define which data components of the data will be used in constructing the graphic.

  • Apply geommetric data layers to existing plots.

Data

For this example, we will use the Rice Rivers Center data again.

library( tidyverse )
url <- "https://raw.githubusercontent.com/dyerlab/ENVS-Lectures/master/data/arapat.csv"
data <- read_csv(url)

Components of Graphical Objects

  • A aesthetic statement indicating which columns of data to use and how to use them in the plot (designating x-axis vs color, etc.).

  • An estimate of a trendline through the data (the red one), which displays a statistical summary of the raw data.

  • A set of geometric overlays for the points which include size and shape configurations.

  • Specified color scheme for the regions.

  • Labeling of a subset of the data (which is done using a separate data.frame derived from the first).

  • Labels on axes.

  • A legend positioned in a specific fashion.

  • A title over the whole thing.

  • A theme for the rest of the coloring and customized lines and grids.

The Grammar of Graphics

  • Data

  • Aesthetics

  • Transformations

  • Partitions

  • Auxillary Text

  • Overlays

The ggplot2 Library

R Packages for Data Science

  • RStudio + Hadley Wickham

  • Collection of Packages

  • Makes you AWESOME

library( ggplot2 )

The Aesthetics

An aesthetic is a function that allows you to tell the graphics which columns of data are to be used in the creation of graph features.

aes( x = Sepal.Length, y = Sepal.Width )
Aesthetic mapping: 
* `x` -> `Sepal.Length`
* `y` -> `Sepal.Width`

 

Commonly included within the initial call to ggplot()

ggplot( iris, aes( x = Sepal.Length, y = Sepal.Width ) )

Stepwise Creation of a Plot

ggplot( iris )

Stepwise Creation of a Plot

ggplot( iris, aes( x = Sepal.Length) )

Stepwise Creation of a Plot

Adding a Geometry Layer

ggplot( iris, aes( x = Sepal.Length) ) + 
  geom_histogram()

Stepwise Creation of a Density Plot

ggplot( iris, aes( x = Sepal.Length) ) +
  geom_density() 

A Scatter Plot

ggplot( iris, aes( x = Sepal.Length, y = Sepal.Width) ) + geom_point() 

Scatterplot with Colors

Aesthetics also contribute to symbologies and colors

ggplot( iris, aes( x = Sepal.Length, 
                   y = Sepal.Width, 
                   color=Species) ) + 
  geom_point() 

In & Out of aes()

ggplot( iris ) + 
  geom_point(aes( x = Sepal.Length, y = Sepal.Width, col=Species), shape=5)

Iterative Building of Graphics

p <- ggplot( iris )
p <- p + geom_point( aes( x = Sepal.Length, 
                          y = Sepal.Width, 
                          col=Species, 
                          shape=Species), 
                     size=3, 
                     alpha=0.75 ) 
p <- p + xlab("Sepal Length")
p <- p + ylab("Sepal Length")
class(p)
[1] "gg"     "ggplot"

Printing out p

p

Scope of Visibility

Only things in ggplot() apply to all following components. Placing aes() or data= parts in later components only make them visible to that particular component.

ggplot( iris, aes( x = Sepal.Length, y = Sepal.Width) ) + 
  geom_point() + 
  stat_smooth() 
ggplot( iris ) + 
  geom_point( aes( x = Sepal.Length, y = Sepal.Width) ) + 
  stat_smooth(  aes( x = Sepal.Length, y = Sepal.Width) )
ggplot() + 
  geom_point( aes( x = Sepal.Length, y = Sepal.Width), data = iris )  + 
  stat_smooth( aes( x = Sepal.Length, y = Sepal.Width), data = iris )  

 

p + theme_bw()

 

p + theme_bw()

 

p + theme_gray()

 

p + theme_linedraw()

 

p + theme_dark()

 

p + theme_minimal()

 

p + theme_classic()

 

p + theme_void()

Create Your Own Themes

source("theme_dyerlab_grey.R")
theme_dyerlab_grey
function (base_size = 18, base_family = "") 
{
    theme_grey(base_size = base_size, base_family = base_family) %+replace% 
        theme(axis.line = element_blank(), axis.text.x = element_text(size = base_size * 
            0.8, color = "white", lineheight = 0.9), axis.text.y = element_text(size = base_size * 
            0.8, color = "white", lineheight = 0.9), axis.ticks = element_line(color = "white", 
            size = 0.2), axis.title.x = element_text(size = base_size, 
            color = "white", margin = margin(0, 10, 0, 0)), axis.title.y = element_text(size = base_size, 
            color = "white", angle = 90, margin = margin(0, 10, 
                0, 0)), axis.ticks.length = unit(0.3, "lines"), 
            legend.background = element_rect(color = NA, fill = "#272822"), 
            legend.key = element_rect(color = "white", fill = "#272822"), 
            legend.key.size = unit(1.2, "lines"), legend.key.height = NULL, 
            legend.key.width = NULL, legend.text = element_text(size = base_size * 
                0.8, color = "white"), legend.title = element_text(size = base_size * 
                0.8, face = "bold", hjust = 0, color = "white"), 
            legend.position = "right", legend.text.align = NULL, 
            legend.title.align = NULL, legend.direction = "vertical", 
            legend.box = NULL, panel.background = element_rect(fill = "#272822", 
                color = NA), panel.border = element_rect(fill = NA, 
                color = "white"), panel.grid.major = element_line(color = "grey35"), 
            panel.grid.minor = element_line(color = "grey20"), 
            panel.spacing = unit(0.5, "lines"), strip.background = element_rect(fill = "grey30", 
                color = "grey10"), strip.text.x = element_text(size = base_size * 
                0.8, color = "white"), strip.text.y = element_text(size = base_size * 
                0.8, color = "white", angle = -90), plot.background = element_rect(color = "#272822", 
                fill = "#272822"), plot.title = element_text(size = base_size * 
                1.2, color = "white"), plot.margin = unit(rep(1, 
                4), "lines"))
}

Create Your Own Themes

p + theme_dyerlab_grey()

Font Sizes

p + theme_dyerlab_grey( base_size = 22)

Globally Set Font Sizes

You can also set the theme and base sizes (and other theme-related items) globally for an entire document/presentation as:

ggplot2::theme_set( theme_bw( base_size=16) )

Boxplots

ggplot( iris, aes( x = Sepal.Length) ) + 
  geom_boxplot( notch=TRUE )

Species Differences

ggplot( iris, aes(x=Species, y=Sepal.Length) )  + 
  geom_boxplot( notch=TRUE )

Species Differences Fill Colors

ggplot( iris, aes(x=Species, y=Sepal.Length) )  + 
  geom_boxplot( notch=TRUE, fill=c("#002145") ) +
  ylab("Sepal Length")

Species Differences Fill Colors

ggplot( iris, aes(x=Species, y=Sepal.Length) )  + 
  geom_boxplot( notch=TRUE, fill=c("#002145", "#a5acaf","#66c010") ) +
  ylab("Sepal Length")

Overlaying a Trendline

ggplot( iris, aes( x = Sepal.Length, y = Sepal.Width) ) + 
  geom_point()  + 
  stat_smooth()

Overlaying a Trendline

ggplot( iris, aes( x = Sepal.Length, y = Sepal.Width) ) + 
  geom_point()  + 
  stat_smooth( method="lm", formula = "y ~ x")

Stacking Order

ggplot( iris, aes( x = Sepal.Length, y = Sepal.Width) ) + 
  geom_point( color="red")  + 
  stat_smooth( fill="black", alpha=1)

Stacking Order

ggplot( iris, aes( x = Sepal.Length, y = Sepal.Width) ) + 
  stat_smooth( fill="black", alpha=1) + 
  geom_point( color="red")  

On-The-Fly Transformations

Customizing the y-axis data format…

ggplot( iris, aes(x = Sepal.Length) ) + 
  geom_histogram( aes( y = ..density.. ), 
                  color="green", 
                  fill="orange", bins = 15 ) + 
  geom_density( color = "magenta", lwd=1.5 )  

Textual Overlays

cor_model <- cor.test( iris$Sepal.Length, iris$Sepal.Width)
cor_model

    Pearson's product-moment correlation

data:  iris$Sepal.Length and iris$Sepal.Width
t = -1.4403, df = 148, p-value = 0.1519
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.27269325  0.04351158
sample estimates:
       cor 
-0.1175698 
names( cor_model ) 
[1] "statistic"   "parameter"   "p.value"     "estimate"    "null.value" 
[6] "alternative" "method"      "data.name"   "conf.int"   

Formatting as Text

cor.text <- paste( "r = ", 
                   format( cor_model$estimate, digits=4), 
                   ";\n P = ", 
                   format( cor_model$p.value, digits=4 ), 
                   sep="" ) 
cor.text
[1] "r = -0.1176;\n P = 0.1519"

 

p + geom_text( aes(x=7.25, y=4.25, label=cor.text) )

Labels

iris |> 
  group_by( Species ) |> 
  summarize( Sepal.Length = mean(Sepal.Length), 
             Sepal.Width = mean(Sepal.Width)) -> iris_Centroids
iris_Centroids
# A tibble: 3 × 3
  Species    Sepal.Length Sepal.Width
  <fct>             <dbl>       <dbl>
1 setosa             5.01        3.43
2 versicolor         5.94        2.77
3 virginica          6.59        2.97

Labels

ggplot( iris, aes(Sepal.Length, Sepal.Width) ) + 
  geom_point( aes(color=Species) ) + 
  geom_text( aes(label=Species), data=iris_Centroids) 

Smart Labels

library( ggrepel )

 

ggplot( iris, aes(Sepal.Length, Sepal.Width) ) + 
  geom_point( aes(color=Species) ) + 
  geom_label_repel( aes(label=Species), data=iris_Centroids )

Remove Legend

ggplot( iris, aes(Sepal.Length, Sepal.Width) ) + 
  geom_point( aes(color=Species) ) + 
  geom_label_repel( aes(label=Species), data=iris_Centroids ) + 
  guides( color = FALSE ) + 
  theme_minimal()

Questions

If you have any questions, please feel free to post to the Canvas discussion board for the class, or drop me an email.

Peter Sellers looking bored